In this R markdown document, I delineate, analyze and calculate various settlement hierarchy metrics and then reorganize the data and export for script #7
All of the data and scripts are downloadable from the new ASU SettlementPersist2022 github repository, which can be downloaded locally as a .zip folder or cloned to your own account.
Either way, once you have done so, you will need to modify the working directory (setwd(“C:/…)”) path and “dir” variables in the code chunk below to match the repository location on your computer.
wd <- list()
# SET YOUR LOCAL DIRECTORY LOCATION HERE:
wd$dir <- "C:/Users/rcesaret/Dropbox (ASU)/ASUSettlementPersist2022/"
# wd$dir <- 'C:/Users/TJ McMote/Dropbox (ASU)/ASUSettlementPersist2022'
wd$analysis <- paste0(wd$dir, "analysis/")
wd$data_r <- paste0(wd$dir, "data-raw/")
wd$data_p <- paste0(wd$dir, "data-processed/")
wd$data_f <- paste0(wd$dir, "data-final-outputs/")
wd$figs <- paste0(wd$dir, "figures/")
wd$funcs <- paste0(wd$dir, "functions/")
# Package names
packages <- c("rgdal", "rgeos", "sp", "sf", "GISTools", "raster", "Matrix",
"gdistance", "lwgeom", "tidyverse", "tidyr", "classInt", "pacman", "RColorBrewer",
"cowplot", "stars", "ggnewscale", "broom", "zoo", "lmtest", "sandwich",
"lctools", "REAT", "ineq")
# , 'data.table', 'mgcv','igraph', 'ggrepel','ggridges', 'movecost',
# 'datplot', 'scales',
# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
# load packages
invisible(lapply(packages, library, character.only = TRUE))
rm(packages, installed_packages)
# Read in custom R functions located in the wd$funcs directory folder
FUNCS <- list("splitByAttributes.R", "RS_Acoef.R", "PopAreaResids.R", "LQ.R",
"GiniSp.R")
invisible(lapply(FUNCS, function(x) source(paste0(wd$funcs, x))))
rm(FUNCS)
Data we are importing:
# Agg Site and catchment polygon data
All_AggPoly <- readOGR(paste0(wd$data_p, "SBOM_AggSitePoly5.gpkg"))
## OGR data source with driver: GPKG
## Source: "C:\Users\rcesaret\Dropbox (ASU)\ASUSettlementPersist2022\data-processed\SBOM_AggSitePoly5.gpkg", layer: "SBOM_AggSitePoly5"
## with 1417 features
## It has 282 fields
All_CatchPoly <- readOGR(paste0(wd$data_p, "SBOM_CatchPoly5.gpkg"))
## OGR data source with driver: GPKG
## Source: "C:\Users\rcesaret\Dropbox (ASU)\ASUSettlementPersist2022\data-processed\SBOM_CatchPoly5.gpkg", layer: "SBOM_CatchPoly5"
## with 1417 features
## It has 282 fields
# Catchment boundary limit polygon
CatchLims <- readOGR(paste0(wd$data_r, "CatchLims.gpkg"))
## OGR data source with driver: GPKG
## Source: "C:\Users\rcesaret\Dropbox (ASU)\ASUSettlementPersist2022\data-raw\CatchLims.gpkg", layer: "CatchLims"
## with 1 features
## It has 89 fields
CatchLims.sf = st_as_sf(CatchLims) #for ggplot
## Hillshade Basemap Raster with lake
HillshadeLake <- raster(paste0(wd$data_r, "HillshadeLake.tif"))
HillshadeLake <- rast(HillshadeLake, crs = 26914)
Hillshade.s <- st_as_stars(HillshadeLake) #for ggplot basemap
# Cost-distance matrices
ord <- c("EF", "EF_MF", "MF", "MF_LF", "LF", "LF_TF", "TF", "TF_CL", "CL", "CL_ET",
"ET", "ET_LTAzI", "LTAzI", "LTAzI_EA", "EA", "EA_LA", "LA")
temp = list.files(path = paste0(wd$data_p, "CDMatrices/"), full.names = TRUE)
nam.distmat = list.files(path = paste0(wd$data_p, "CDMatrices/"), full.names = F)
nam.distmat = gsub("_cdmat.csv", "", nam.distmat)
CD.mats = lapply(temp, read.csv, header = TRUE, row.names = 1)
CD.mats = lapply(CD.mats, as.matrix)
CD.mats = lapply(CD.mats, function(x) `colnames<-`(x, rownames(x)))
names(CD.mats) <- nam.distmat
CD.mats <- CD.mats[ord]
names(CD.mats) <- paste0(ord, "_cdmat")
TrsprtNet_CDmatList <- readRDS(file = paste0(wd$data_p, "TrsprtNet_CDmatList.RData"))
# TrsprtNet_CDmatList <-
# readRDS(file=paste0(wd$data_p,'TrsprtNet_CDmatList.RData'))
# reorder spatial points dataframe by period
All_AggPoly <- All_AggPoly[order(All_AggPoly$PeriodNum), ]
All_CatchPoly <- All_CatchPoly[order(All_CatchPoly$PeriodNum), ]
# Split polygons by Phase, saved as list of SPDF
Poly_List <- splitByAttributes(spdata = All_AggPoly, attr = "Period", suffix = "_SitePoly")
Catch_List <- splitByAttributes(spdata = All_CatchPoly, attr = "Period", suffix = "_CatchPoly")
# convert spatial polygons dataframe to spatial points dataframe
coor = All_AggPoly@data[, c("East", "North")] #create separate dataframe of coordinates
rownames(coor) <- as.numeric(rownames(coor)) #make sure rownames match
All_AggPts <- SpatialPointsDataFrame(coor, All_AggPoly@data, match.ID = TRUE) #convert to points
proj4string(All_AggPts) <- CRS("+proj=utm +zone=14 +datum=NAD83 +units=m +no_defs") #set CRS
All_AggPts = as(st_make_valid(st_as_sf(All_AggPts)), "Spatial") #make sure geometry is valid
All_AggPts <- All_AggPts[order(All_AggPts$PeriodNum), ] #reorder by period
# Split points by Phase, saved as list of spatial points dataframes
Pts_List <- splitByAttributes(spdata = All_AggPts, attr = "Period", suffix = "_Pts")
Classifying the hierarchical levels of settlement systems from archaeological data can be notoriously arbitrary without more contextual information. The main issues are why one set of size breaks is better than another, how many levels to include, and the temptation to ideosyncratic/unsystematic/non-reproducible human choices that vary between periods/cases/regions.
To get around these issues, I employ a systematic and reproducible two-part methodology. First, the number of hierarchical levels is chosen as the rounded mean of several different established methods of computing the number of classes (as opposed to partitioning the class intervals themselves) for a univariate distribution such as settlement population sizes. The methods that performed the best on our right-skew city size distributions are:
| PeriodNum | Period | Breaks | NumClasses |
|---|---|---|---|
| 1 | EF | 9, 200, 299 | 2 |
| 2 | EF_MF | 68, 120, 171 | 2 |
| 3 | MF | 9, 100, 300, 1000, 3000, 3785 | 5 |
| 4 | MF_LF | 12, 150, 400, 800, 1347 | 4 |
| 5 | LF | 9, 100, 800, 3500, 7000, 8192 | 5 |
| 6 | LF_TF | 11, 200, 1000, 2500, 6000, 8137 | 5 |
| 7 | TF | 8, 250, 1000, 2000, 5000, 6689 | 5 |
| 8 | TF_CL | 7, 100, 200, 400, 550 | 4 |
| 9 | CL | 9, 100, 300, 600, 1500, 4451 | 5 |
| 10 | CL_ET | 21, 300, 2500, 4927 | 3 |
| 11 | ET | 9, 125, 350, 1250, 3000, 7500, 9637 | 6 |
| 12 | ET_LTAzI | 9, 100, 300, 600, 2000, 3668 | 5 |
| 13 | LTAzI | 9, 200, 500, 1250, 2500, 7500, 9356 | 6 |
| 14 | LTAzI_EA | 5, 100, 500, 2000, 7000, 12500, 14908 | 6 |
| 15 | EA | 4, 150, 600, 1250, 3000, 8000, 15477 | 6 |
| 16 | EA_LA | 13, 200, 650, 2000, 10000, 20000, 28282 | 6 |
| 17 | LA | 2, 250, 750, 2000, 4000, 8000, 13560 | 6 |
All four of these were performed on the population data of each period, and the row-wise mean and median (rounded to the nearest integer) were calculated.
Manual <- ManualHierClasses$NumClasses
Sturges <- NA
Scott <- NA
HeadTails <- NA
for (i in 1:length(Poly_List)) {
Sturges[i] <- nclass.Sturges(Poly_List[[i]]$Population.s2)
Scott[i] <- nclass.scott(Poly_List[[i]]$Log_Population.s2)
HTbreaks <- classIntervals(Poly_List[[i]]$Population.s2, style = "headtails")
HeadTails[i] <- length(HTbreaks$brks) - 1
}
levels <- data.frame(PeriodNum = ManualHierClasses$PeriodNum, Period = ManualHierClasses$Period,
Sturges = Sturges, Scott = Scott, HeadTails = HeadTails, Manual = Manual)
levels <- levels %>%
rowwise() %>%
mutate(Median = round(median(c(Sturges, HeadTails, Manual)), 0), Mean = round(mean(c(Sturges,
HeadTails, Manual)), 0), ) %>%
ungroup()
knitr::kable(levels, "pipe", align = "c")
| PeriodNum | Period | Sturges | Scott | HeadTails | Manual | Median | Mean |
|---|---|---|---|---|---|---|---|
| 1 | EF | 4 | 2 | 2 | 2 | 2 | 3 |
| 2 | EF_MF | 3 | 1 | 2 | 2 | 2 | 2 |
| 3 | MF | 6 | 3 | 3 | 5 | 5 | 5 |
| 4 | MF_LF | 6 | 3 | 3 | 4 | 4 | 4 |
| 5 | LF | 8 | 5 | 3 | 5 | 5 | 5 |
| 6 | LF_TF | 6 | 4 | 3 | 5 | 5 | 5 |
| 7 | TF | 8 | 5 | 3 | 5 | 5 | 5 |
| 8 | TF_CL | 6 | 3 | 3 | 4 | 4 | 4 |
| 9 | CL | 8 | 6 | 4 | 5 | 5 | 6 |
| 10 | CL_ET | 6 | 4 | 4 | 3 | 4 | 4 |
| 11 | ET | 7 | 5 | 4 | 6 | 6 | 6 |
| 12 | ET_LTAzI | 7 | 4 | 4 | 5 | 5 | 5 |
| 13 | LTAzI | 9 | 9 | 4 | 6 | 6 | 6 |
| 14 | LTAzI_EA | 8 | 6 | 4 | 6 | 6 | 6 |
| 15 | EA | 9 | 8 | 4 | 6 | 6 | 6 |
| 16 | EA_LA | 9 | 7 | 5 | 6 | 6 | 7 |
| 17 | LA | 10 | 13 | 5 | 6 | 6 | 7 |
rm(Manual, Sturges, Scott, HeadTails, HTbreaks)
Surprisingly, the median of the four methods is very close to the “Manual” archaeological reasoning method undertaken by myself – only differing by 1 for a single period (otherwise identical). The difference between the mean and the median is also quite small. More specifically, the mean is higher than the median by 1 for four periods (otherwise identical).
The Heads/Tails method has a very low sensitivity to such changes overall by design. Because Heads/Tails splits progressively down the distribution by cutting at the mean of the left tail, the increasing number of classes only reflects the magnitude of right-skew (completely insensitive to the number + variability of cases in the left tail). By contrast, Sturges’ and Scott’s methods are especially responsive to the growing ranges of the distributions, estimating systematically much higher numbers of classes for periods with a large number+range of settlements in a wide-ranging hierarchy (and vice versa). As such, they are both very sensitive to changes in the distribution as a whole.
In this sense, the mean and median central tendencies nicely balance the extremes of the three systematic methods. The near identity of the mean and median, and their close similarity to the independently estimated “Manual archaeological breaks reasoning” method, both reccomend this middling set of metrics. I think that the mean performs the best because it rounds upwards for periods where there is a high right-skew. Whereas my manual classifications were cautious not to over-split the settlements in the fat right tail, the higher mean seems to indicate that my right tail bin sizes were too wide (i.e. they deserved to be subdivided in some cases, which is being picked up in the mean because of the suystematic shifts in the other metrics). I have therefore chosen to use the mean as the number of hierarchical levels to classify for each period.
The “classInt” R Package (Bivand, 2022) provides a number of functions for classifying the univariate settlement size (population) hierarchy into the number of groups specified above (i.e. the integer-rounded mean of the four methods). Of these methods, some are not appropriate for a right-skew distribution, and others don’t enable us to specify the number of classes. The best performing methods are kmeans, fisher and jenks. All others leave almost the entire fat left tail of the distribution unpartitioned.
## Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
## Computing Hierarchical Clustering
## Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
## Computing Hierarchical Clustering
These two periods are also representative in terms of their mixed geographical distribution of size classes across the region, further cross-validating the Kmeans settlement hierarchy class intervals.
Now we can classify all periods using this method
for (i in 1:length(Poly_List)) {
nc = levels$Mean[i]
kmbrks <- classIntervals(Poly_List[[i]]$Population.s2, style = "kmeans",
n = nc)
Poly_List[[i]]$SetHierLevel <- cut(Poly_List[[i]]$Population.s2, kmbrks$brks,
labels = FALSE, include.lowest = TRUE)
}
PropMax = scale = settlement value / max settlement value Pop size with respect to max of whole region.
Pop Rank = Overall rank in settlement hierarchy
sitepoly.names <- names(Poly_List)
Poly_List2 <- list() #create output list
for (i in 1:length(Catch_List)) {
tmp.p <- Poly_List[[i]] #define site polys as temp object
tmp.p@data$Pop_PropMax <- tmp.p@data$Population.s2/(max(tmp.p@data$Population.s2,
na.rm = T))
tmp.p@data$Pop_Rank <- rank(-tmp.p@data$Population.s2, na.last = "keep",
ties.method = "average")
if ((max(tmp.p@data$UrbanPop.s2, na.rm = T)) > 0) {
tmp.p@data$UrbanPop_PropMax <- tmp.p@data$UrbanPop.s2/(max(tmp.p@data$UrbanPop.s2,
na.rm = T))
} else {
tmp.p@data$UrbanPop_PropMax <- NA
}
if ((max(tmp.p@data$UrbanPop, na.rm = T)) > 0) {
tmp.p@data$UrbanPop_Rank <- rank(-tmp.p@data$UrbanPop.s2, na.last = "keep",
ties.method = "average")
} else {
tmp.p@data$UrbanPop_Rank <- NA
}
tmp.p@data$PopDens_PropMax <- tmp.p@data$PopDens.s2/(max(tmp.p@data$PopDens.s2,
na.rm = T))
tmp.p@data$PopDens_Rank <- rank(-tmp.p@data$PopDens.s2, na.last = "keep",
ties.method = "average")
tmp.p@data$UrbanScale_PropMax <- tmp.p@data$UrbanScale.s2/(max(tmp.p@data$UrbanScale.s2,
na.rm = T))
tmp.p@data$UrbanScale_Rank <- rank(-tmp.p@data$UrbanScale.s2, na.last = "keep",
ties.method = "average")
tmp.p@data <- tmp.p@data %>%
rowwise() %>%
mutate(UrbanPop_Rank = ifelse(is.na(UrbanPop_Rank), max(UrbanPop_Rank) +
1, UrbanPop_Rank), Pop_Rank = ifelse(is.na(Pop_Rank), max(Pop_Rank.s2) +
1, Pop_Rank), UrbanScale_Rank = ifelse(is.na(UrbanScale_Rank), max(UrbanScale_Rank) +
1, UrbanScale_Rank)) %>%
ungroup()
Poly_List2[[i]] <- tmp.p #save to output list
}
# rename list items
names(Poly_List2) <- sitepoly.names
Poly_List <- Poly_List2
rm(Poly_List2)
# resid_list <- list() gg_list <- list() coef_list <- list()
for (i in 1:length(Poly_List)) {
rr <- PopAreaResids(dat = Poly_List[[i]]@data)
# coef_list[[i]] <- rr[[1]] resid_list[[i]] <- rr[[2]]
Poly_List[[i]]$ScalResid <- rr[[2]]$Resid
Poly_List[[i]]$ScalSlope <- rr[[1]]$estimate[2]
Poly_List[[i]]$ScalInt <- rr[[1]]$estimate[1]
}
# resids <- resids[match(All_AggPoly$AggID,resids$AggID),] scaling <-
# do.call(rbind, coef_list) resids <- do.call(rbind, resid_list)
Standardized against global averages
The ‘location quotient’ (or ‘LQ’) is used in economic geography as an indicator of local concentration or specialization. It is a double intensive metric given by the equation
\[ LQ_{ij} = \frac{E_{ij}/E_{j}}{\sum E_{ij}/\sum E_{j}} \] where \(E_{j}\) is total employment in locality \(j\), and \(E_{ij}\) is employment in industry \(i\) in locality \(j\). The numerator is thus the fraction of local employment in industry \(i\), while he denominator is the the fraction of total regional employment in industry \(i\).
We can use LQ for things other than just employment, as it is a commonly used metric in other disciplines – it merely compares a local relative abundance to a wider/total relative abundance. Here, we can use it to evaluate
LQ is included in several R packages (Balland, 2017; e.g. Kalogirou, 2020; Wieland, 2019), but I have written my own function for it due to its simplicity.
LQ can be applied at any spatial scale comparing a locality to its wider region (e.g. settlements within a region, regions within regions, etc.). As such, the ‘Focal Location Quotient’ (FLQ) has been developed to look at how spatial neighborhoods of localities (defined by some spatial distance criteria around each locality) compare to the wider region (Cromley & Hanink, 2012). It is thus a kind of local spatial moving window metric. We can apply FLQ using the .
FLQ is implemented in the “lctools” R package (Kalogirou, 2020), but therein only allows you to use k nearest neighbors. As such, I have written my own functions that define our own neighborhood of settlements from the transport network cost-distances from script #4 (or cost-distanced from script #3) using either a threshold distance or k nearest neighbors.
## outlist <- list()
dthresh = 1 # threshold cost-distance (in hours) for local neighborhood calculations
for (i in 1:length(Poly_List)) {
m = TrsprtNet_CDmatList[[i]]
df = Poly_List[[i]]@data
## outdf <- df %>%
## select(AggID,AggSite,Period,PeriodNum,Population.s2,UrbanPop.s2,Q.Urb_rPert)
Poly_List[[i]]$LQ_UrbPop <- LQ(df = df, e = "UrbanPop.s2", E = "Population.s2")
Poly_List[[i]]$FLQ_UrbPop <- FLQ.dist(df = df, e = "UrbanPop.s2", E = "Population.s2",
m = m, r = dthresh)
Poly_List[[i]]$LQ_UrbOccuTime <- LQ(df = df, e = "UrbOccuTime", E = "OccuTime")
Poly_List[[i]]$FLQ_UrbOccuTime <- FLQ.dist(df = df, e = "UrbOccuTime", E = "OccuTime",
m = m, r = dthresh)
Poly_List[[i]]$LQ_UrbOccuInertia <- LQ(df = df, e = "UrbOccuInertia", E = "OccuInertia")
Poly_List[[i]]$FLQ_UrbOccuInertia <- FLQ.dist(df = df, e = "UrbOccuInertia",
E = "OccuInertia", m = m, r = dthresh)
# outdf$LQ_Pct_UrbdeltaPop12 <- LQ(df=df,e='Pct_UrbdeltaPop12',
# E='Pct_deltaPop12') outdf$FLQ_Pct_UrbdeltaPop12 <- FLQ.dist(df=df,
# e='Pct_UrbdeltaPop12', E='Pct_deltaPop12',m=m, r=dthresh)
# outdf$LQ_PopDens <- LQ(df=df, e='Population.s2', E='Area_ha')
# outdf$FLQ_PopDens <- FLQ.dist(df=df, e='Population.s2',
# E='Area_ha',m=m, r=dthresh) outdf$LQ_Catch_Popdens <- LQ(df=df,
# e='Population.s2', E='Catchment_ha') outdf$FLQ_Catch_Popdens <-
# FLQ.dist(df=df, e='Population.s2', E='Catchment_ha',m=m, r=dthresh)
# outdf$LQ_Urb_Abs_rPert <- LQ(df=df, e='Urb_Abs_rPert',
# E='Abs_rPert') outdf$FLQ_Urb_Abs_rPert <- FLQ.dist(df=df,
# e='Urb_Abs_rPert', E='Abs_rPert', m=m, r=dthresh)
# outdf$LQ_Q.Urb_rPert <- LQ(df=df, e='Q.Urb_rPert', E='Q.rPert')
# outdf$FLQ_Q.Urb_rPert <- FLQ.dist(df=df, e='Q.Urb_rPert',
# E='Q.rPert',m=m, r=dthresh)
Poly_List[[i]]$LQ_PopFwCont <- LQ(df = df, e = "FwOvlp.Pop", E = "Population.s2")
Poly_List[[i]]$LQ_PopBwCont <- LQ(df = df, e = "BwOvlp.Pop", E = "Population.s2")
Poly_List[[i]]$LQ_AreaFwCont <- LQ(df = df, e = "FwOvlp.Area", E = "Area_ha")
Poly_List[[i]]$LQ_AreaBwCont <- LQ(df = df, e = "BwOvlp.Area", E = "Area_ha")
## outlist[i] <- outdf
}
The spGini metric is the ratio of the gini among the focal set of spatial neighbors around agiven site (here a 1 hr cost dist threshold) to the global Gini. Thus, GiniSp = Local / Global.
The spatial Gini Index is implemented in the “lctools” R package (Kalogirou, 2020), but therein only allows you to use k nearest neighbors. As such, I have written my own functions that define our own neighborhood of settlements from the transport network cost-distances from script #4 (or cost-distanced from script #3) using either a threshold distance or k nearest neighbors.
dthresh = 1 # threshold cost-distance (in hours) for local neighborhood calculations
for (i in 1:length(Poly_List)) {
m = TrsprtNet_CDmatList[[i]]
df = Poly_List[[i]]@data
Poly_List[[i]]$spGini_Pop <- GiniSp.dist(df = df, var = "Population.s2",
m = m, r = dthresh)
Poly_List[[i]]$spGini_UrbPop <- GiniSp.dist(df = df, "UrbanPop.s2", m = m,
r = dthresh)
Poly_List[[i]]$spGini_OccuTime <- GiniSp.dist(df = df, "OccuTime", m = m,
r = dthresh)
Poly_List[[i]]$spGini_UrbOccuTime <- GiniSp.dist(df = df, "UrbOccuTime",
m = m, r = dthresh)
Poly_List[[i]]$spGini_OccuInertia <- GiniSp.dist(df = df, "OccuInertia",
m = m, r = dthresh)
Poly_List[[i]]$spGini_UrbOccuInertia <- GiniSp.dist(df = df, "UrbOccuInertia",
m = m, r = dthresh)
Poly_List[[i]]$spGini_Pct_deltaPop12 <- GiniSp.dist(df = df, "Pct_deltaPop12",
m = m, r = dthresh)
Poly_List[[i]]$spGini_Pct_UrbdeltaPop12 <- GiniSp.dist(df = df, "Pct_UrbdeltaPop12",
m = m, r = dthresh)
Poly_List[[i]]$spGini_Area_ha <- GiniSp.dist(df = df, "Area_ha", m = m,
r = dthresh)
Poly_List[[i]]$spGini_Catchment_ha <- GiniSp.dist(df = df, "Catchment_ha",
m = m, r = dthresh)
Poly_List[[i]]$spGini_Abs_rPert <- GiniSp.dist(df = df, "Abs_rPert", m = m,
r = dthresh)
Poly_List[[i]]$spGini_Urb_Abs_rPert <- GiniSp.dist(df = df, "Urb_Abs_rPert",
m = m, r = dthresh)
Poly_List[[i]]$spGini_Q.rPert <- GiniSp.dist(df = df, "Q.rPert", m = m,
r = dthresh)
Poly_List[[i]]$spGini_Q.Urb_rPert <- GiniSp.dist(df = df, "Q.Urb_rPert",
m = m, r = dthresh)
Poly_List[[i]]$spGini_FwOvlp.Pop <- GiniSp.dist(df = df, "FwOvlp.Pop", m = m,
r = dthresh)
Poly_List[[i]]$spGini_BwOvlp.Pop <- GiniSp.dist(df = df, "BwOvlp.Pop", m = m,
r = dthresh)
Poly_List[[i]]$spGini_FwOvlp.Area <- GiniSp.dist(df = df, "FwOvlp.Area",
m = m, r = dthresh)
Poly_List[[i]]$spGini_BwOvlp.Area <- GiniSp.dist(df = df, "BwOvlp.Area",
m = m, r = dthresh)
Poly_List[[i]]$spGini_PopPressure <- GiniSp.dist(df = df, "PopPressure",
m = m, r = dthresh)
Poly_List[[i]]$spGini_centr_avg <- GiniSp.dist(df = df, "centr_avg", m = m,
r = dthresh)
Poly_List[[i]]$spGini_centr_deg <- GiniSp.dist(df = df, "centr_deg", m = m,
r = dthresh)
Poly_List[[i]]$spGini_centr_btw <- GiniSp.dist(df = df, "centr_btw", m = m,
r = dthresh)
Poly_List[[i]]$spGini_centr_eig <- GiniSp.dist(df = df, "centr_eig", m = m,
r = dthresh)
Poly_List[[i]]$spGini_centr_clos <- GiniSp.dist(df = df, "centr_clos", m = m,
r = dthresh)
Poly_List[[i]]$spGini_centr_hrmo <- GiniSp.dist(df = df, "centr_hrmo", m = m,
r = dthresh)
Poly_List[[i]]$spGini_centr_hub <- GiniSp.dist(df = df, "centr_hub", m = m,
r = dthresh)
Poly_List[[i]]$spGini_centr_auth <- GiniSp.dist(df = df, "centr_auth", m = m,
r = dthresh)
Poly_List[[i]]$spGini_centr_pgrk <- GiniSp.dist(df = df, "centr_pgrk", m = m,
r = dthresh)
Poly_List[[i]]$spGini_trans_loc <- GiniSp.dist(df = df, "trans_loc", m = m,
r = dthresh)
Poly_List[[i]]$spGini_SetHierLevel <- GiniSp.dist(df = df, "SetHierLevel",
m = m, r = dthresh)
}
# Convert lists of period-wise sites/catchments to single SPDF objects
All_Agg_SitePoly <- do.call(rbind, Poly_List)
All_Agg_CatchPoly <- do.call(rbind, Catch_List)
# variables from site data that needs transfer over to catchment areas
colz1 = setdiff(colnames(All_Agg_SitePoly@data),colnames(All_Agg_CatchPoly@data))
# variables from catchment areas that needs transfer over to site data
colz2 = setdiff(colnames(All_Agg_CatchPoly@data),colnames(All_Agg_SitePoly@data))
#reorder the data to match
All_Agg_SitePoly <- All_Agg_SitePoly[order(All_Agg_SitePoly$AggSite),]
All_Agg_CatchPoly <- All_Agg_CatchPoly[order(All_Agg_CatchPoly$AggSite),]
#check to see that the two datasets are in the right order
#identical(All_Agg_SitePoly@data$AggSite, All_Agg_CatchPoly@data$AggSite)
# Site data to catchment areas
Site_to_Catch <- All_Agg_SitePoly@data %>% select(!!!syms(colz1))
All_Agg_CatchPoly@data <- cbind(All_Agg_CatchPoly@data,Site_to_Catch)
# catchment area data to sites
Catch_to_Site <- All_Agg_CatchPoly@data %>% select(!!!syms(colz2))
All_Agg_SitePoly@data <- cbind(All_Agg_SitePoly@data,Catch_to_Site)
#Reorganize the data
ordering <- c(
#ID VARIABLES
"AggSite","AggID","Site","East","North","SurvReg","Number","CerPhase","Period",
"PeriodType","PeriodLength","PeriodNum","PeriodBegin","PeriodEnd",
"OccSeqLoc","OccSeqLoc.Sites","SubOccSeqLoc","SubOccSeqLoc.Sites",
"ComponentNum", "ComponentSites",
#CHRONOLOGICAL VARIABLES
"PeriodInterval", "PeriodBegin", "PeriodBegin.era", "PeriodMidpoint",
"PeriodMidpoint.era", "PeriodEnd", "PeriodEnd.era", "PeriodLength",
#OCCUPATION VARIABLES (COUNTS)
"Occ.EF","Occ.EF_MF","Occ.MF","Occ.MF_LF","Occ.LF","Occ.LF_TF",
"Occ.TF","Occ.TF_CL","Occ.CL","Occ.CL_ET","Occ.ET","Occ.ET_LTAzI",
"Occ.LTAzI","Occ.LTAzI_EA","Occ.EA","Occ.EA_LA","Occ.LA","Occ.TOT",
#SUBOCCUPATION VARIABLES (COUNTS)
"SubOcc.EF","SubOcc.EF_MF","SubOcc.MF","SubOcc.MF_LF","SubOcc.LF",
"SubOcc.LF_TF","SubOcc.TF","SubOcc.TF_CL","SubOcc.CL","SubOcc.CL_ET",
"SubOcc.ET","SubOcc.ET_LTAzI","SubOcc.LTAzI","SubOcc.LTAzI_EA",
"SubOcc.EA","SubOcc.EA_LA","SubOcc.LA","SubOcc.TOT",
#SITE AREA AND OCCUPATIONAL DENSITY VARS
"Area_ha","Perim_m2","SherdDens","Tot.Assemb","FwOvlp.Assemb",
"BwOvlp.Assemb", "Net.Assemb",
#STEP #2 DEMOGRAPHIC VARIABLES
"Population.s2","Log_Population.s2", "ApportAssemb", "PopDens.s2",
"UrbanScale.s2", "UrbanPop.s2","RuralPop.s2", "PctUrban.s2","PctRural.s2",
"Prior", "Observed", "MeanOccuProb",
#STEP #2 DEMOGRAPHIC RATES
"Pct_deltaPop12", "Pct_deltaPop01", "r12_Pert", "r01_Pert",
"r23_Pert", "rPert", "rPert0", "rPert2",
"Pct_UrbdeltaPop12", "Pct_UrbdeltaPop01", "Urb_r12_Pert", "Urb_r01_Pert",
"Urb_r23_Pert", "Urb_rPert", "Urb_rPert0", "Urb_rPert2",
"Abs_rPert","RS_rPert","LogRS_rPert","LogAbs_rPert","Q.rPert","Q.Abs_rPert",
"Q.RS_rPert","Q.LogRS_rPert","Q.LogAbs_rPert","Urb_Abs_rPert",
"Urb_RS_rPert","Urb_LogRS_rPert","Urb_LogAbs_rPert","Q.Urb_rPert","Q.Urb_Abs_rPert",
"Q.Urb_RS_rPert","Q.Urb_LogRS_rPert","Q.Urb_LogAbs_rPert",
"Qp.rPert","Qp.Abs_rPert","Qp.RS_rPert","Qp.LogRS_rPert","Qp.LogAbs_rPert",
#STEP #1 DEMOGRAPHIC VARIABLES
"Population.s1","PopDens.s1","UrbanScale.s1","UrbanPop.s1","RuralPop.s1",
"PctUrban.s1","PctRural.s1",
#CONTINUITY VARIABLES
"AreaBwCont","AreaFwCont","PopBwCont","PopFwCont","FwOvlp.Sites",
"FwOvlp.Area","FwOvlp.Pop","BwOvlp.Sites","BwOvlp.Area","BwOvlp.Pop",
#PERSISTENCE VARIABLES
"Found","FoundInit","Abandon","Persist","DewarType",
"OccuTime","OccuInertia","UrbOccuTime","UrbOccuInertia",
#STEP #4 TRANSPORT NETWORK VARIABLES
"TranspDens.pct", "TranspDens.rank","centr_deg","centr_btw","centr_eig",
"centr_clos","centr_hrmo","centr_hub","centr_auth","centr_pgrk",
"centr_deg_n","centr_btw_n","centr_eig_n","centr_clos_n","centr_hrmo_n",
"centr_hub_n","centr_auth_n","centr_avg","trans_loc","trans_locavg",
"density","connectiv","trans_glob","cntrlz_deg","cntrlz_btw","cntrlz_eig",
"cntrlz_clo","cntrlz_hub","cntrlz_auth","cntrlz_deg_n","cntrlz_btw_n",
"cntrlz_eig_n","cntrlz_clo_n","cntrlz_hub_n","cntrlz_auth_n","cntrlz_avg",
#STEP #4 CATCHMENT AREA AND POP DENSITY VARIABLES
"Catchment_ha", "CatchmentBeyond_ha", "Catch_Popdens",
"Catch_Popdens_PropMax","Catch_Popdens_Rank","CatchB_Popdens",
"CatchB_Popdens_PropMax", "CatchB_Popdens_Rank",
#STEP #5 CATCHMENT ENVIRONMENT/TOPOGRAPHY VARIABLES
"NPP.tot","NPP.avg","EZ.avg","EZ.sd","TRI.tot","TRI.avg","IrrigPot.tot",
"IrrigPot.avg","WetAgPot.tot","WetAgPot.avg","IntnsCost.tot",
"IntnsCost.avg","IntnsCostNW.tot","IntnsCostNW.avg","AGPot.tot",
"AGPot.avg","AGPotNW.tot","AGPotNW.avg","PopPressure","PopPressureNW",
"ErosionPot.tot","ErosionPot.avg",
#STEP #6 SETTLEMENT HIERARCHY DEMOG VARS
"SetHierLevel","ScalResid","ScalSlope","ScalInt","Pop_PropMax","Pop_Rank",
"UrbanPop_PropMax","UrbanPop_Rank","PopDens_PropMax","PopDens_Rank",
"UrbanScale_PropMax","UrbanScale_Rank",
#STEP #6 SETTLEMENT HIERARCHY/DEMOG LOCATION QUOTIENTS
"LQ_UrbPop","FLQ_UrbPop","LQ_UrbOccuTime","FLQ_UrbOccuTime","LQ_UrbOccuInertia",
"FLQ_UrbOccuInertia","LQ_PopFwCont","LQ_PopBwCont","LQ_AreaFwCont","LQ_AreaBwCont",
#"LQ_Pct_UrbdeltaPop12","FLQ_Pct_UrbdeltaPop12","LQ_PopDens",
#"FLQ_PopDens","LQ_Catch_Popdens","FLQ_Catch_Popdens","LQ_Urb_Abs_rPert",
#"FLQ_Urb_Abs_rPert","LQ_Q.Urb_rPert","FLQ_Q.Urb_rPert",
#STEP #6 SETTLEMENT HIERARCHY/DEMOG SPATIAL GINIS
"spGini_Pop","spGini_UrbPop","spGini_OccuTime","spGini_UrbOccuTime",
"spGini_OccuInertia","spGini_UrbOccuInertia","spGini_Pct_deltaPop12",
'spGini_Pct_UrbdeltaPop12',"spGini_Area_ha","spGini_Catchment_ha",
"spGini_Abs_rPert","spGini_Urb_Abs_rPert","spGini_Q.rPert","spGini_Q.Urb_rPert",
"spGini_FwOvlp.Pop","spGini_BwOvlp.Pop","spGini_FwOvlp.Area","spGini_BwOvlp.Area",
"spGini_PopPressure",
#STEP #6 TRANSPORT NETWORK SPATIAL GINIS
"spGini_centr_avg","spGini_centr_deg","spGini_centr_btw",
"spGini_centr_eig","spGini_centr_clos","spGini_centr_hrmo","spGini_centr_hub",
"spGini_centr_auth","spGini_centr_pgrk","spGini_trans_loc","spGini_SetHierLevel",
#SURVEY METADATA
"M_Sites","M_SiteCode","M_SiteName","M_FieldSite.Region",
"M_FieldSite.Period","M_SurveyYearNumber","M_Supervisor","M_Map",
#OLD tDAR BOM SURVEY VARIABLES
"O_Elev","O_ElevMed","O_ElevMin","O_ElevMax","O_EZcode",
"O_EnvironmentalZone","O_Soil","O_SoilMed","O_SoilMin","O_SoilMax",
"O_Erosion","O_ErosionMed","O_ErosionMin","O_ErosionMax","O_ModernUse",
"O_ModernSettlement","O_Rainfall","O_Area","O_MoundDomestic",
"O_MoundCeremonial","O_MoundQuestionable","O_MoundTotal",
"O_MoundRecorded","O_DMoundArea","O_Architecture","O_TerraceConfidence",
"O_TerraceExtent","O_Sherd","O_SherdMed","O_SherdMin","O_SherdMax",
"O_Rubble","O_RubbleMed","O_RubbleMin","O_RubbleMax","O_Population",
"O_PopMin","O_PopMax","O_PopMethod","O_stcode","O_SiteType",
"O_SubPeriod1","O_SubPeriod2","O_OccEF","O_OccMF","O_OccLF","O_OccTF",
"O_OccCL","O_OccEC","O_OccMC","O_OccLC","O_OccET","O_OccLT","O_OccAZ",
"O_OccEA","O_OccLA","O_OccTot","O_OccSeqLoc","O_SubOc1","O_SubOc2",
"O_PdDupSite","O_Group","O_Comments")
#Make sure everything is kosher
#setdiff(colnames(All_Agg_CatchPoly@data),ordering)
#setdiff(colnames(All_Agg_SitePoly@data),ordering)
#setdiff(ordering,colnames(All_Agg_CatchPoly@data))
#setdiff(ordering,colnames(All_Agg_SitePoly@data))
# Reorder the data for both sites and catchment areas
All_Agg_SitePoly@data <- All_Agg_SitePoly@data %>% select(!!!syms(ordering))
All_Agg_CatchPoly@data <- All_Agg_CatchPoly@data %>% select(!!!syms(ordering))
# AggSite polygons
writeOGR(All_Agg_SitePoly, paste0(wd$data_p, "SBOM_Agg_SitePoly6.gpkg"), "SBOM_Agg_SitePoly6",
driver = "GPKG", overwrite_layer = TRUE)
# Catchment areas
writeOGR(All_Agg_CatchPoly, paste0(wd$data_p, "SBOM_Agg_CatchPoly6.gpkg"), "SBOM_Agg_CatchPoly6",
driver = "GPKG", overwrite_layer = TRUE)